/**********************************************************************/
/*
   Author: Karan Makkar, adapted from Michelle's code
   Created: Nov 2023
   Description: Define regression programs for all outcome regressions.

   Note: to run, run the corresponding master.do file
*/
/**********************************************************************/

* Define OLS regression
* Inputs:
* name: name of regression
* y: outcome variable
* x: treatment indicator
* c: control vars
* absorb: everything in absorb()
* vce: everything in vce() or clustering levels
* precomma: if conditions for cuts
* Ordered as in actual regression

  cap program drop ols_reg
  program define ols_reg
    version 17.0
    args name y x c precomma absorb vce

    di "Running: OLS Regression for `y' - OLS"
		di "`c(current_time)'"

    * Run regression
    eststo `name' : reghdfe `y' i.`x' `c' if `precomma', `absorb' `vce'

    * Get control complier mean
    local beta_iv = _b[1.`x']
    qui sum `y' if `precomma' & `x' == 1
    qui estadd scalar control_mean = round(`r(mean)' - `beta_iv', 0.001)

  end

* Define IV regression
* Inputs:
* name: name of regression
* y_var: outcome variable
* x_var: treatment indicator
* c: controls
* instrument: instrument for x
* absorb: everything in absorb()
* vce: everything in vce() or clustering levels
* precomma: if conditions for cuts
* Ordered as in actual regression

  cap program drop iv_reg
  program define iv_reg
    version 17.0
    args name y x instrument c precomma absorb cluster

    di "Running: IV Regression for `y'"
		di "`c(current_time)'"

    * Run regression
    eststo `name' : ivreghdfe `y' (i.`x' = `instrument') `c' if `precomma', `absorb' `cluster'

    * Get control complier mean
    local beta_iv = _b[1.`x']
    qui sum `y' if `precomma' & `x' == 1
    qui estadd scalar control_mean = round(`r(mean)' - `beta_iv', 0.001)

  end

* Define OLS regression with interactions
* Inputs:
* name: name of regression
* y_var: outcome variable
* x_var: treatment indicator
* c: controls
* instrument: instrument for x
* interact: interaction term (must be a dummy)
* absorb: everything in absorb()
* vce: everything in vce() or clustering levels
* precomma: if conditions for cuts
* Ordered as in actual regression
* Returns two models for outputting: "modelname_i0" and "modelname_i1"
* One for interaction = 0 estimate and another for interaction = 1 estimate

  cap program drop ols_reg_interact
  program define ols_reg_interact
    version 17.0
    args name y x interact c precomma absorb cluster

    * Generate interaction variables
    capture drop `x'_`interact'
    gen `x'_`interact' = `x' * `interact'
    di "Generated variable `x'_`interact'"

    * Get treated means (for calculating control complier means)
    qui sum `y' if `precomma' & `x' == 1 & `interact' == 1
    local int_1_mean = r(mean)

    qui sum `y' if `precomma' & `x' == 1 & `interact' == 0
    local int_0_mean = r(mean)

    * Run interacted IV regression
    di "Running: IV Regression for `y', interacted with `interact'"
		di "`c(current_time)'"
    eststo `name'_i0 : reghdfe `y' i.`x' `x'_`interact' `interact' `c' if `precomma', `absorb' `cluster'

    * Get control complier means
    local beta_iv = _b[1.`x']
    local beta_iv_i = _b[`x'_`interact']
    qui estadd scalar control_mean = round(`int_0_mean' - `beta_iv', 0.001)
    qui gen sample = e(sample)

    scalar n = e(N)
    noi scalar  di n

    count if sample ==1 & `interact' ==0
    qui estadd scalar obs_bygroup = r(N)

    * Generate second model with coefficient for interaction = 1 
    * This is done to be compatible with the table generating code:
    * "modelname_i0" includes estimate when interaction = 0 
    * and "modelname_i1" includes estimate when interaction = 1
    qui lincom 1.`x' + `x'_`interact'
  	scalar control_mean_i = round(`int_1_mean' - `r(estimate)', 0.001)

    capture matrix drop k
    capture matrix drop se
    capture matrix drop nobs

    matrix tmp = `r(estimate)'
    matrix colnames tmp = 1.`x'
    matrix k = nullmat(k), tmp
    matrix tmp[1,1] = `r(se)'
    matrix se = nullmat(se), tmp
    matrix tmp[1,1] = e(N)
    matrix nobs = nullmat(nobs), tmp

    ereturn post k
    qui estadd matrix se
    qui estadd matrix nobs
    qui estadd scalar control_mean = control_mean_i

    count if sample ==1 & `interact' ==1
    qui estadd scalar obs_bygroup = r(N)
    cap drop sample

    eststo `name'_i1

  end

* Define IV regression with interactions
* Inputs:
* name: name of regression
* y_var: outcome variable
* x_var: treatment indicator
* c: controls
* instrument: instrument for x
* interact: interaction term (must be a dummy)
* absorb: everything in absorb()
* vce: everything in vce() or clustering levels
* precomma: if conditions for cuts
* Ordered as in actual regression
* Returns two models for outputting: "modelname_i0" and "modelname_i1"
* One for interaction = 0 estimate and another for interaction = 1 estimate

  cap program drop iv_reg_interact
  program define iv_reg_interact
    version 17.0
    args name y x instrument interact c precomma absorb cluster

    * Generate interaction variables
    capture drop `x'_`interact'
    gen `x'_`interact' = `x' * `interact'
    di "Generated variable `x'_`interact'"

    capture drop `instrument'_`interact'
    gen `instrument'_`interact' = `instrument' * `interact'
    di "Generated variable `instrument'_`interact'"

    * Get treated means (for calculating control complier means)
    qui sum `y' if `precomma' & `x' == 1 & `interact' == 1
    local int_1_mean = r(mean)

    qui sum `y' if `precomma' & `x' == 1 & `interact' == 0
    local int_0_mean = r(mean)

    * Run interacted IV regression
    di "Running: IV Regression for `y', interacted with `interact'"
		di "`c(current_time)'"
    eststo `name'_i0 : ivreghdfe `y' (i.`x' `x'_`interact' = `instrument' `instrument'_`interact') `interact' `c' if `precomma', `absorb' `cluster'

    * Get control complier means
    local beta_iv = _b[1.`x']
    local beta_iv_i = _b[`x'_`interact']
    qui estadd scalar control_mean = round(`int_0_mean' - `beta_iv', 0.001)
    qui gen sample = e(sample)

    scalar n = e(N)
    noi scalar  di n

    count if sample ==1 & `interact' ==0
    qui estadd scalar obs_bygroup = r(N) :`name'_i0

    * Save interaction p-value
    esttab `name'_i0, cells(b p)
    global pval = r(coefs)[3, 2]
    di $pval
    estadd scalar pval $pval :`name'_i0

    * Generate second model with coefficient for interaction = 1 
    * This is done to be compatible with the table generating code:
    * "modelname_i0" includes estimate when interaction = 0 
    * and "modelname_i1" includes estimate when interaction = 1
    qui lincom 1.`x' + `x'_`interact'
  	scalar control_mean_i = round(`int_1_mean' - `r(estimate)', 0.001)

    capture matrix drop k
    capture matrix drop se
    capture matrix drop nobs

    matrix tmp = `r(estimate)'
    matrix colnames tmp = 1.`x'
    matrix k = nullmat(k), tmp
    matrix tmp[1,1] = `r(se)'
    matrix se = nullmat(se), tmp
    matrix tmp[1,1] = e(N)
    matrix nobs = nullmat(nobs), tmp

    ereturn post k
    qui estadd matrix se
    qui estadd matrix nobs
    qui estadd scalar control_mean = control_mean_i

    count if sample ==1 & `interact' ==1
    qui estadd scalar obs_bygroup = r(N)
    cap drop sample
    
    eststo `name'_i1

  end

* Define IV regression with interaction: More than 2 categories
* Inputs:
* name: name of regression
* y_var: outcome variable
* x_var: treatment indicator
* instrument: instrument for x
* interact: interaction term (must be a dummy)
* absorb: everything in absorb()
* vce: everything in vce() or clustering levels
* precomma: if conditions for cuts
* Ordered as in actual regression
* Returns two models for outputting: "modelname_i0" and "modelname_i1"
* One for interaction = 0 estimate and another for interaction = 1 estimate

  cap program drop iv_reg_interact_m
  program define iv_reg_interact_m
    version 17.0
    args name y x instrument interact base c precomma absorb cluster

    * Generate interaction variables
    local i =1
    local `x'_`base'
    local `instrument'_`base'
    *Loop over all interaction vars
    foreach int of local interact{

      *Generate treatment interaction
      capture drop `x'_`int'
      gen `x'_`int' = `x' * `int'
      di "Generated variable `x'_`int'" 
      local `x'_`base' ``x'_`base'' `x'_`int'

      *Generate instrument interaction
      capture drop `instrument'_`int'
      gen `instrument'_`int' = `instrument' * `int'
      di "Generated variable `instrument'_`int'"
      local `instrument'_`base' ``instrument'_`base'' `instrument'_`int'

      *Save mean y for cat
      qui sum `y' if `precomma' & `x' == 1 & `int' == 1
      local int_`i'_mean = r(mean)

      *Iterate
      local i = `i'+1
    }

    qui sum `y' if `precomma' & `x' == 1 & `base' == 1
    local int_0_mean = r(mean)

    local i = 1
    foreach int of local interact{
    * Run interacted IV regression
    di "Running: IV Regression for `y', interacted with `interact'"
		di "`c(current_time)'"
    eststo `name'_i0 : ivreghdfe `y' (i.`x' ``x'_`base'' = `instrument' ``instrument'_`base'') `interact' `c' if `precomma', `absorb' `cluster'

    * Get control complier means
    local beta_iv = _b[1.`x']
    qui estadd scalar control_mean = round(`int_0_mean' - `beta_iv', 0.001)
    scalar n = e(N)
    noi scalar  di n

    * Generate additional models with coefficient for interaction = 1 
    * This is done to be compatible with the table generating code:
    * "modelname_i0" includes estimate when all interactions = 0 
    * and "modelname_i1" includes estimate when first interaction = 1.... and so on.
    
      local beta_iv_i`i' = _b[`x'_`int']
      qui lincom 1.`x' + `x'_`int'
      scalar control_mean_i`i' = round(`int_`i'_mean' - `r(estimate)', 0.001)
      capture matrix drop k
      capture matrix drop se
      capture matrix drop nobs

      matrix tmp = `r(estimate)'
      matrix colnames tmp = 1.`x'
      matrix k = nullmat(k), tmp
      matrix tmp[1,1] = `r(se)'
      matrix se = nullmat(se), tmp
      matrix tmp[1,1] = e(N)
      matrix nobs = nullmat(nobs), tmp

      ereturn post k
      qui estadd matrix se
      qui estadd matrix nobs
      qui estadd scalar control_mean = control_mean_i`i'
      eststo `name'_i`i'
      local i = `i' + 1
    }

  end

* Define Constant regression
* Inputs:
* name: name of regression

  cap program drop constant_reg
  program define constant_reg
    version 17.0
    args name
    
    * Run regression
    eststo `name' : reg female constant

    * Get empty mean
    qui sum missing
    qui estadd scalar control_mean = r(mean)

  end

* Define Constant regression with interaction
* Inputs:
* name: name of regression

  cap program drop constant_reg_interact
  program define constant_reg_interact
    version 17.0
    args name
    
    * Run regression
    eststo `name'_i0 : reg female constant

    * Get empty mean
    qui sum missing
    qui estadd scalar control_mean = r(mean)

    eststo `name'_i1 : reg female constant

    * Get empty mean
    qui sum missing
    qui estadd scalar control_mean = r(mean)

  end
// END
